-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix: ad creative url parsing--child links #22
Changes from 16 commits
b3cd219
4b8d4a5
38c250c
e9be935
a06a43c
edb5000
c417527
9876468
cb99004
187f69b
a5469e6
864bf1b
7dd25b0
27a9e3b
859612b
dcf3e5f
b24e335
7b55c9e
868b1be
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,39 +8,55 @@ | |
{% macro default__stitch_fb_ad_creatives() %} | ||
|
||
with base as ( | ||
|
||
select * from {{ var('ad_creatives_table') }} | ||
|
||
), | ||
|
||
child_links as ( | ||
|
||
select * from {{ ref('fb_ad_creatives__child_links') }} | ||
|
||
), | ||
|
||
links_joined as ( | ||
|
||
select | ||
|
||
id as creative_id, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you've added a join to this CTE, which now makes it very challenging to tell which fields come from which of the tables that are being joined. please prefix all fields with the table that they're coming from in this situation so that others can understand your code when they read through it! |
||
lower(nullif(url_tags, '')) as url_tags, | ||
lower(coalesce( | ||
nullif(child_link, ''), | ||
nullif(object_story_spec__link_data__call_to_action__value__link, ''), | ||
nullif(object_story_spec__video_data__call_to_action__value__link, ''), | ||
nullif(object_story_spec__link_data__link, '') | ||
)) as url | ||
|
||
from | ||
{{ var('ad_creatives_table') }} | ||
from base | ||
left join child_links | ||
on base.id = child_links.creative_id | ||
|
||
), | ||
|
||
), splits as ( | ||
splits as ( | ||
|
||
select | ||
|
||
creative_id, | ||
url, | ||
links_joined.creative_id, | ||
links_joined.url, | ||
{{ dbt_utils.split_part('url', "'?'", 1) }} as base_url, | ||
--this is a strange thing to have to do but it's because sometimes | ||
--the URL exists on the story object and we wouldn't get the appropriate | ||
--UTM params here otherwise | ||
coalesce(url_tags, {{ dbt_utils.split_part('url', "'?'", 2) }} ) as url_tags | ||
coalesce(links_joined.url_tags, {{ dbt_utils.split_part('url', "'?'", 2) }} ) as url_tags | ||
|
||
from base | ||
from links_joined | ||
|
||
) | ||
|
||
select | ||
|
||
*, | ||
splits.*, | ||
{{ dbt_utils.get_url_host('url') }} as url_host, | ||
'/' || {{dbt_utils.get_url_path('url') }} as url_path, | ||
|
||
|
@@ -54,34 +70,50 @@ from splits | |
{% macro snowflake__stitch_fb_ad_creatives() %} | ||
|
||
with base as ( | ||
|
||
select * from {{ var('ad_creatives_table') }} | ||
|
||
), | ||
|
||
child_links as ( | ||
|
||
select * from {{ ref('fb_ad_creatives__child_links') }} | ||
|
||
), | ||
|
||
links_joined as ( | ||
|
||
select | ||
|
||
id as creative_id, | ||
lower(coalesce( | ||
nullif(object_story_spec['link_data']['call_to_action']['value']['link']::varchar, ''), | ||
nullif(object_story_spec['video_data']['call_to_action']['value']['link']::varchar, ''), | ||
nullif(object_story_spec['link_data']['link']::varchar, '') | ||
)) as url, | ||
|
||
url_tags | ||
|
||
from {{ var('ad_creatives_table') }} | ||
|
||
nullif(child_link, ''), | ||
nullif(base.object_story_spec['link_data']['call_to_action']['value']['link']::varchar, ''), | ||
nullif(base.object_story_spec['video_data']['call_to_action']['value']['link']::varchar, ''), | ||
nullif(base.object_story_spec['link_data']['link']::varchar, '') | ||
)) as url, | ||
|
||
url_tags | ||
|
||
from base | ||
left join child_links | ||
on base.id = child_links.creative_id | ||
|
||
), | ||
|
||
parsed as ( | ||
|
||
select | ||
|
||
links_joined.*, | ||
creative_id, | ||
url, | ||
{{ dbt_utils.get_url_host('url') }} as url_host, | ||
'/' || {{dbt_utils.get_url_path('url') }} as url_path, | ||
|
||
{{ facebook_ads.get_url_parameter() }} | ||
|
||
from base | ||
from links_joined | ||
|
||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
-- There are cases where ads can have multiple links attached to them. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The core logic that you're trying to do across redshift and snowflake is identical here, it's just that Stitch loads the data in differently. My recommendation would be to create a base model that normalizes the schemas across both Redshift and snowflake, and then do the transformation in a single model that doesn't need two different adapters because the underlying data is identical. We can accomplish what you're doing here much more simply and minimize the maintenance burden for future analysts. |
||
-- This model was created to extract all the child links and return the first | ||
-- instance where a URL contains utm parameters. | ||
|
||
|
||
{% macro stitch_fb_ad_creatives__child_links() %} | ||
|
||
{{ adapter_macro('facebook_ads.stitch_fb_ad_creatives__child_links') }} | ||
|
||
{% endmacro %} | ||
|
||
{% macro default__stitch_fb_ad_creatives__child_links() %} | ||
|
||
|
||
with base as ( | ||
|
||
select * from {{ var('ad_creatives__child_links_table') }} | ||
|
||
), | ||
|
||
child_attachment_links as ( | ||
|
||
select | ||
|
||
_sdc_source_key_id as creative_id, | ||
_sdc_batched_at, | ||
link as child_link | ||
|
||
from base | ||
where child_link ilike '%utm%' | ||
|
||
), | ||
|
||
aggregated as ( | ||
|
||
select distinct | ||
|
||
creative_id, | ||
first_value(child_link) over ( | ||
partition by creative_id | ||
order by _sdc_batched_at | ||
rows between unbounded preceding and unbounded following | ||
) as child_link | ||
|
||
from child_attachment_links | ||
|
||
) | ||
|
||
select * from aggregated | ||
|
||
{% endmacro %} | ||
|
||
|
||
{% macro snowflake__stitch_fb_ad_creatives__child_links() %} | ||
|
||
|
||
{% set fields = [ | ||
|
||
'object_story_spec', | ||
'child_link', | ||
|
||
]%} | ||
|
||
with base as ( | ||
|
||
select * from {{ var('ad_creatives_table') }} | ||
|
||
), | ||
|
||
child_attachment_links as ( | ||
|
||
select | ||
*, | ||
attachments.value:link::varchar as child_link | ||
|
||
from base, | ||
lateral flatten (input => object_story_spec:link_data:child_attachments) attachments | ||
where child_link ilike '%utm%' | ||
|
||
), | ||
|
||
aggregated as ( | ||
|
||
select distinct | ||
id as creative_id, | ||
|
||
{% for field in fields %} | ||
first_value({{ field }}) over (partition by id | ||
order by _sdc_batched_at | ||
rows between unbounded preceding and unbounded following) | ||
as {{ field }} | ||
{% if not loop.last%} , {% endif %} | ||
{% endfor %} | ||
|
||
from child_attachment_links | ||
|
||
) | ||
|
||
select * from aggregated | ||
|
||
{% endmacro %} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{{ | ||
config( | ||
enabled = var('etl') == 'stitch' | ||
) | ||
}} | ||
|
||
{{ stitch_fb_ad_creatives__child_links() }} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
almost all of the implementation of this model across redshift and snowflake is identical. we should refactor this to make the code more DRY.